# study area
# studyarea <- st_read("data/StudyArea.shp") %>%
studyarea <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/studyarea/StudyArea.shp") %>%
st_transform('EPSG:2272')
## Reading layer `StudyArea' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/studyarea/StudyArea.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -8367662 ymin: 4859107 xmax: -8365440 ymax: 4860628
## Projected CRS: WGS 84 / Pseudo-Mercator
# Philly block groups
# blockgroups <- st_read("data/Philly_blockgroup.shp") %>%
blockgroups <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Philly_blockgroup/Philly_blockgroup.shp") %>%
st_transform('EPSG:2272')
## Reading layer `Philly_blockgroup' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Philly_blockgroup/Philly_blockgroup.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1338 features and 18 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -8380161 ymin: 4846701 xmax: -8344037 ymax: 4886015
## Projected CRS: WGS 84 / Pseudo-Mercator
# philly bounds
philly_bounds <- st_union(blockgroups)
# philly hydrology (bounded by philly_bounds; source: https://opendataphilly.org/datasets/hydrology/)
hydro <- st_read("https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson") %>%
st_transform(crs = 'EPSG:2272') %>%
st_intersection(philly_bounds)
## Reading layer `OGRGeoJSON' from data source
## `https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson'
## using driver `GeoJSON'
## Simple feature collection with 7970 features and 29 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -76.18462 ymin: 38.7667 xmax: -74.71871 ymax: 40.66593
## Geodetic CRS: WGS 84
# highway
# stateroads_inphilly <- st_read("data/PaStateRoads2024_03.geojson") %>%
# st_transform('EPSG:2272') %>%
# st_intersection(philly_bounds)
# write out stateroads_inphilly as its own file since it's big
# st_write(stateroads_inphilly,
# "data/PhillyStateRoads.shp")
# read in pre-filtered stateroads data
stateroads_inphilly <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/PhillyStateRoads.shp")
## Reading layer `PhillyStateRoads' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/PhillyStateRoads.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2315 features and 105 fields
## Geometry type: MULTILINESTRING
## Dimension: XY
## Bounding box: xmin: 2660586 ymin: 207551.4 xmax: 2749290 ymax: 304306.6
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
philly_mainhighways <- stateroads_inphilly %>%
filter(TRAF_RT_NO %in% c("I", "US"),
ST_RT_NO %in% c("0001", "0095", "0076", "0676")) %>%
dplyr::select(STREET_NAM, ST_RT_NO, TRAF_RT_NO, TRAF_RT__1)
# save and only use the highways of interest
# for now, interested in comparing highways that cut through neighborhoods vs those that don't
# ACS
# philly property data (https://opendataphilly.org/datasets/philadelphia-properties-and-assessment-history/) <-- that was the original source, but now i'm using the same downloaded file that Luming and Sijia have been working from
# philly_properties <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/opa_properties_public.geojson") %>%
# st_transform(crs = 'EPSG:2272')
# new property CSV
# this is a CSV and doesn't have the right geometry col
# philly_properties_new <- read_csv("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/property_philly_250307.csv") %>%
# rename(old_geom = geometry) %>%
# left_join(philly_properties %>%
# select(objectid, geometry), # joining with original properties data to get geometries
# by = "objectid") %>%
# st_as_sf(crs = 'EPSG:2272')
# write out file as geojson
# st_write(philly_properties_new,
# "~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/property_philly_250307.geojson")
# new property data (from 3/7/25 with inflation adjusted property prices)
philly_properties <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/property_philly_250318.geojson")
## Reading layer `property_philly' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/property_philly_250318.geojson'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 90508 features and 75 fields (with 13 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 2662223 ymin: 211097.5 xmax: 2749274 ymax: 304860.8
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
# philly park data (https://opendataphilly.org/datasets/ppr-properties/)
philly_parks <- st_read("https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson") %>%
st_transform(crs = 'EPSG:2272') %>%
st_intersection(philly_bounds)
## Reading layer `PPR_Properties' from data source
## `https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 507 features and 25 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -75.28353 ymin: 39.87117 xmax: -74.95865 ymax: 40.13186
## Geodetic CRS: WGS 84
# philly schools
school <-
st_read("https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson") %>%
st_transform('EPSG:2272')
## Reading layer `Schools' from data source
## `https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 495 features and 15 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.2665 ymin: 39.90781 xmax: -74.97057 ymax: 40.12974
## Geodetic CRS: WGS 84
# city facilities (https://opendataphilly.org/datasets/city-facilities-master-facilities-database/)
facilities <- st_read("https://opendata.arcgis.com/datasets/b3c133c3b15d4c96bcd4d5cc09f19f4e_0.geojson") %>%
st_transform('EPSG:2272') %>%
filter(STATUS == "A") # remove inactive sites
## Reading layer `City_Facilities_pub' from data source
## `https://opendata.arcgis.com/datasets/b3c133c3b15d4c96bcd4d5cc09f19f4e_0.geojson'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 3197 features and 32 fields (with 5 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.28169 ymin: 39.86686 xmax: -74.96455 ymax: 40.13108
## Geodetic CRS: WGS 84
# libraries (from facilities data)
libraries <- facilities %>%
filter(ASSET_GROUP1 == "A8")
# prisons (from facilities data)
prisons <- facilities %>%
filter(ASSET_GROUP2 == "A13.3")
# historic site (from facilities data)
historic <- facilities %>%
filter(ASSET_SUBTYPE1 == "C21")
# museums (from facilities data)
museums <- facilities %>%
filter(ASSET_GROUP1 == "A18",
ASSET_SUBTYPE1 == "C35")
# produce (LPSS, HPSS -- indicators for produce; low or high produce supply stores)
retail_produce <- st_read("https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson") %>%
st_transform('EPSG:2272')
## Reading layer `370eb703-5a4f-4abb-8920-727cef31373b2020329-1-rcimn.5n39o' from data source `https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 1336 features and 16 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -75.28031 ymin: 39.86747 xmax: -74.95575 ymax: 40.13793
## Geodetic CRS: WGS 84
# transit stops
metro <- st_read("https://opendata.arcgis.com/api/v3/datasets/af52d74b872045d0abb4a6bbbb249453_0/downloads/data?format=geojson&spatialRefId=4326") %>%
st_transform('EPSG:2272') %>%
mutate(Type = "metro")
## Reading layer `Highspeed_Stations' from data source
## `https://opendata.arcgis.com/api/v3/datasets/af52d74b872045d0abb4a6bbbb249453_0/downloads/data?format=geojson&spatialRefId=4326'
## using driver `GeoJSON'
## Simple feature collection with 74 features and 5 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.35358 ymin: 39.90543 xmax: -75.07788 ymax: 40.11349
## Geodetic CRS: WGS 84
city_hall <- metro[metro$Station == 'City Hall', 6]
trolley <- st_read("https://opendata.arcgis.com/api/v3/datasets/dd2afb618d804100867dfe0669383159_0/downloads/data?format=geojson&spatialRefId=4326") %>%
st_transform('EPSG:2272')
## Reading layer `Trolley_Stations' from data source
## `https://opendata.arcgis.com/api/v3/datasets/dd2afb618d804100867dfe0669383159_0/downloads/data?format=geojson&spatialRefId=4326'
## using driver `GeoJSON'
## Simple feature collection with 60 features and 5 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.394 ymin: 39.90667 xmax: -75.16256 ymax: 39.96216
## Geodetic CRS: WGS 84
# why are some trolley stations missing? going southwest from 40th st station + going northwest from 33rd/34th station
trolley_renamed <- trolley %>%
rename(Station = StopName,
Route = LineAbbr,
Longitude = Lon,
Latitude = Lat) %>%
mutate(Type = "trolley")
# Combine both datasets into one
transit <- bind_rows(metro, trolley_renamed)
# PA hospitals (within 2.5 miles of philly bounds)
hospitals <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/DOH_Hospitals202311.geojson") %>%
st_transform('EPSG:2272') %>%
st_intersection(st_buffer(philly_bounds, 13200))
## Reading layer `DOH_Hospitals202311' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/DOH_Hospitals202311.geojson'
## using driver `GeoJSON'
## Simple feature collection with 226 features and 14 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -80.49621 ymin: 39.75163 xmax: -74.86704 ymax: 42.13403
## Geodetic CRS: WGS 84
# farmers markets
farmersmarkets <- st_read("https://opendata.arcgis.com/datasets/0707c1f31e2446e881d680b0a5ee54bc_0.geojson") %>%
st_transform('EPSG:2272')
## Reading layer `Farmers_Markets' from data source
## `https://opendata.arcgis.com/datasets/0707c1f31e2446e881d680b0a5ee54bc_0.geojson'
## using driver `GeoJSON'
## Simple feature collection with 22 features and 53 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.22069 ymin: 39.92713 xmax: -75.05142 ymax: 40.06754
## Geodetic CRS: WGS 84
# checking out what Luming_property.csv looks like
luming_property <- read.csv("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/Luming_property.csv")
# looking at which properties were saved in luming_property -- Luming and Sijia's properties files are limited to the case study area
# luming_properties <- philly_properties %>%
# filter(objectid %in% luming_property$objectid)
# checking out what property_sijia_eda.geojson looks like
sijia_eda <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/property_sijia_eda.geojson")
## Reading layer `property_sijia_eda' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/Dataset/property_sijia_eda.geojson'
## using driver `GeoJSON'
## Simple feature collection with 2394 features and 98 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 2692333 ymin: 236562 xmax: 2697665 ymax: 240121.5
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
# google API to get restaurant data
# incomplete, will figure this out later
# restaurant_results <- google_places(search_string = paste(search_terms, city, sep = " "),
# key = google_api)
# interstate highways in philly
# ggplot() +
# geom_sf(data = philly_bounds) +
# geom_sf(data = stateroads_inphilly %>%
# filter(TRAF_RT_NO == "I"))
#
# # US highways in philly
# ggplot() +
# geom_sf(data = philly_bounds) +
# geom_sf(data = stateroads_inphilly %>%
# filter(TRAF_RT_NO == "US"))
#
# # PA state highways in philly
# ggplot() +
# geom_sf(data = philly_bounds) +
# geom_sf(data = stateroads_inphilly %>%
# filter(TRAF_RT_NO == "PA"))
#
# # all other roads in philly
# ggplot() +
# geom_sf(data = philly_bounds) +
# geom_sf(data = stateroads_inphilly %>%
# filter(!(TRAF_RT_NO %in% c("I", "US", "PA"))))
# I-95, US-1, I-676, and I-76
philly_highways_plot <-
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#eba075",
"0676" = "#7fe3d6"),
labels = c("US-1", "I-76", "I-95", "I-676")) +
labs(title = "Case Study of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways") +
theme_void()
philly_highways_plot
# ggsave("Output/HighwaysMap.png", plot = philly_highways_plot)
# looking at study area within philly bounds
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = studyarea, fill = "coral")
# philly parks
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent")
# philly schools
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = school, color = "brown4") # point data
# libraries
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = libraries, color = "#eb5600") # point data
# transit / looking at what this category actually consists of....
# looks like there's point geography for parking lots, some random transit stops, and heli pad station
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = facilities %>%
# filter(ASSET_GROUP1 == "A17",
# grepl("Parking", ASSET_SUBT1_DESC)), color = "pink") # point data
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = sijia_eda, color = "#7fe3d6")
# prison facilities
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = facilities %>%
# filter(ASSET_GROUP2 == "A13.3"), color = "lightgrey") # point data
# all philly properties
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = philly_properties, color = "black")
For all Philly properties, calculate the distance from highway and see how it relates to price.
# taking distance calculator function (from functions.R in PPA textbook)
calculate_nearest_distance <- function(set_points, other_layer) {
nearest_idx <- st_nearest_feature(set_points, other_layer)
st_distance(set_points, other_layer[nearest_idx, ], by_element = TRUE) %>% as.numeric()
}
The following code chunk took too much to load (too many properties to consider), so I’m going to limit it to the properties within 500m (and possibly look at 1000m and 1500m) from highways to look at the immediate effects of highways on property prices.
# only keep properties that are within 500m (1640.42 ft) of highways of interest
buff500 <- 500 * 3.28084
highways_500buffer <- st_union(st_buffer(philly_mainhighways, buff500))
properties500 <- philly_properties[highways_500buffer,]
# only keep properties that are within 1000m (3280.84 ft) of highways of interest
buff1000 <- 1000 * 3.28084
highways_1000buffer <- st_union(st_buffer(philly_mainhighways, buff1000))
properties1000 <- philly_properties[highways_1000buffer,]
# visualizing buffer
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#eba075",
"0676" = "#7fe3d6")) +
labs(title = "Case Study of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
# visualizing properties to make sure we got the right ones
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500,
color = "#4a4a4a", size = .3) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#eba075",
"0676" = "#7fe3d6")) +
labs(title = "Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "#57470a") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_1000buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties1000,
color = "#4a4a4a", size = .3) +
geom_sf(data = philly_mainhighways,
aes(color = ST_RT_NO)) +
# add custom colors
scale_color_manual(values = c("0001" = "#1a9988",
"0076" = "#eb5600",
"0095" = "#eba075",
"0676" = "#7fe3d6"),
labels = c("US-1", "I-76", "I-95", "I-676")) +
labs(title = "Properties within 1000m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Highways",
fill = "Highways") +
theme_void()
# checking how infaltion-adjusted sale prices look compared to raw sale price
philly_properties %>%
select(sale_date, sale_price, adjusted_sale_price) %>%
st_drop_geometry() %>%
pivot_longer(cols = 2:3,
names_to = "adj",
values_to = "price") %>%
mutate(adj = factor(adj, levels = c("sale_price", "adjusted_sale_price"))) %>%
ggplot(aes(x = sale_date, y = price)) +
geom_point() +
facet_wrap(~adj) +
labs(title = "Sale Price over Time") +
theme_minimal()
# adjusted_sale_price vs sale_price, colored by sale_date
ggplot(philly_properties,
aes(x = sale_price, y = adjusted_sale_price, color = sale_date)) +
geom_point() +
geom_smooth(method = "lm") +
theme_minimal()
It looks like the inflation adjusted sales prices are a bit higher than
the sales prices of 2024.
Are there differences between how price of property relates to dist_to_closest_highway depending on type of highway (whether highway cuts through neighborhood or not)?
# starting with 500m buffer
# need to check if there are 0 values in sale_price before log transforming
summary(properties500$sale_price) # 16,430 rows
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 30345 166000 249900 461129 395000 15500000
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 30345 166000 249900 461129 395000 15500000
summary(properties500$adjusted_sale_price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 31569 190426 285105 529182 448071 19426645
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 31569 190426 285105 529182 448071 19426645
# using property data that has already been filtered
properties500_clean <- properties500 %>%
filter(total_area > 1) %>% # still keeping this filter for now
mutate(price_perTLA = adjusted_sale_price / total_livable_area,
price_perTA = adjusted_sale_price / total_area,
log_price = log(adjusted_sale_price),
log_price_perTLA = log(price_perTLA),
log_price_perTA = log(price_perTA),
category_easy = case_when(category_code_description %in% c("APARTMENTS > 4 UNITS",
"MIXED USE",
"MULTI FAMILY",
"SINGLE FAMILY") ~ "Residential or Mixed",
grepl("INDUS", category_code_description) ~ "Industrial",
grepl("HOTEL", category_code_description) ~ "Hotel",
grepl("OFFICES", category_code_description) ~ "Offices",
category_code_description %in% c("COMMERCIAL",
"GARAGE - COMMERCIAL",
"RETAIL") ~ "Commercial",
grepl("VACANT", category_code_description) ~ "Vacant",
.default = "Other"))
summary(properties500_clean$adjusted_sale_price) # 13,345 rows
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 31569 190426 285105 529182 448071 19426645
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 31569 190426 285105 529182 448071 19426645
summary(properties500_clean$price_perTLA)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 8.232 149.884 220.229 Inf 310.111 Inf 404
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 8.232 149.884 220.229 Inf 310.111 Inf 404
summary(properties500_clean$price_perTA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.179 109.486 175.165 386.567 405.577 11539.259
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 1.179 109.486 175.165 386.567 405.577 11539.259
# correlations between total_area and total_livable_area, and sale_price
# cor(properties500_clean$total_area, properties500_clean$total_livable_area, use = "pairwise.complete.obs") # 0.5581733
# cor(properties500_clean$total_area, properties500_clean$sale_price, use = "pairwise.complete.obs") # 0.4357107
# cor(properties500_clean$total_livable_area, properties500_clean$sale_price, use = "pairwise.complete.obs") # 0.5772053
# histogram of sale_price
ggplot(data = properties500_clean) +
geom_histogram(aes(x = adjusted_sale_price),
bins = 100) +
theme_minimal() +
labs(title = "Distribution of Adjusted Sale Price")
ggplot(data = properties500_clean) +
geom_histogram(aes(x = price_perTA),
bins = 100) +
theme_minimal() +
labs(title = "Distribution of Sale Price per total area")
ggplot(data = properties500_clean) +
geom_histogram(aes(x = price_perTLA),
bins = 100) +
theme_minimal() +
labs(title = "Distribution of Sale Price per total livable area")
ggplot(data = properties500_clean) +
geom_histogram(aes(x = log_price),
bins = 100) +
theme_minimal() +
labs(title = "Distribution of Adjusted Sale Price (log-transformed)")
ggplot(data = properties500_clean) +
geom_histogram(aes(x = log_price_perTA),
bins = 100) +
theme_minimal() +
labs(title = "Distribution of Sale Price per total area (log-transformed)")
ggplot(data = properties500_clean) +
geom_histogram(aes(x = log_price_perTLA),
bins = 100) +
theme_minimal() +
labs(title = "Distribution of Sale Price per total livable area (log-transformed)")
# map of properties within 500 m of highway, colored by sale_price
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
# geom_sf(data = highways_500buffer,
# color = "transparent", fill = "grey", alpha = 0.4) +
# geom_sf(data = properties500_clean,
# aes(color = sale_price), size = .3) +
# geom_sf(data = philly_mainhighways,
# color = "black") +
# labs(title = "Sale Price of Properties within 500m of Highways in Philly",
# subtitle = "US-1, I-76, I-95, and I-676") +
# theme_void()
# map of properties within 500 m of highway, colored by price_perTA
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
# geom_sf(data = highways_500buffer,
# color = "transparent", fill = "grey", alpha = 0.4) +
# geom_sf(data = properties500_clean,
# aes(color = price_perTA), size = .3) +
# geom_sf(data = philly_mainhighways,
# color = "black") +
# labs(title = "Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
# subtitle = "US-1, I-76, I-95, and I-676") +
# theme_void()
# ggplot() +
# geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
# geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
# geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
# geom_sf(data = highways_500buffer,
# color = "transparent", fill = "grey", alpha = 0.4) +
# geom_sf(data = properties500_clean,
# aes(color = price_perTLA), size = .3) +
# geom_sf(data = philly_mainhighways,
# color = "black") +
# labs(title = "Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
# subtitle = "US-1, I-76, I-95, and I-676") +
# theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "black", alpha = 0.2) +
geom_sf(data = properties500_clean,
aes(color = log_price), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Log Price") +
theme_void()
# house sale price (category_easy == "Residential or Mixed")
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "black", alpha = 0.2) +
geom_sf(data = properties500_clean %>%
filter(grepl("Resident", category_easy)),
aes(color = log_price), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price of Properties within 500m of Highways in Philly",
subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
color = "Log Price") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = log_price_perTA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Log Price (per TA)") +
theme_void()
# same thing but residential or mixed use only
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean %>%
filter(grepl("Resident", category_easy)),
aes(color = log_price_perTA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price (per Total Area) of Properties within 500m of Highways in Philly",
subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
color = "Log Price (per TA)") +
theme_void()
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean,
aes(color = log_price_perTLA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Log Price (per TLA)") +
theme_void()
# same thing but residential/mixed use only
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = properties500_clean %>%
filter(grepl("Resident", category_easy)),
aes(color = log_price_perTLA), size = .3) +
geom_sf(data = philly_mainhighways,
color = "black") +
labs(title = "Log Sale Price (per Total Livable Area) of Properties within 500m of Highways in Philly",
subtitle = "Residential or Mixed Use properties only\nUS-1, I-76, I-95, and I-676",
color = "Log Price (per TLA)") +
theme_void()
Calculate each property’s distance to highways.
# calculate each property's distance to highways of interest
# distance is in feet
# start_time <- Sys.time()
# prop500 <- properties500_clean %>%
# select(matches("assessment_date|building_code|category_|census_tract|geographic|zip|market_value|parcel|sale_|taxable|_area|year_built|objectid|geometry|price")) %>%
# mutate(dist_to_US001 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0001")),
# dist_to_I076 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0076")),
# dist_to_I095 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0095")),
# dist_to_I676 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0676")),
# dist_to_US001m = dist_to_US001 * 0.3048,
# dist_to_I076m = dist_to_I076 * 0.3048,
# dist_to_I095m = dist_to_I095 * 0.3048,
# dist_to_I676m = dist_to_I676 * 0.3048) # Time difference of 55.20697 secs
# find closest highway
# prop500_closest <- prop500 %>%
# select(matches("objectid|dist_to")) %>%
# pivot_longer(cols = 2:5,
# names_to = "highway",
# values_to = "distance") %>%
# group_by(objectid) %>%
# summarise(dist_to_closest_highway = min(distance, na.rm = T)) %>%
# ungroup() %>%
# mutate(dist_to_closest_highwaym = dist_to_closest_highway * 0.3048)
# join closest highway
# prop500 <- left_join(prop500,
# prop500_closest %>% st_drop_geometry(),
# by = "objectid") %>%
# mutate(closest_highway = case_when(
# dist_to_closest_highway == dist_to_US001 ~ "US-1",
# dist_to_closest_highway == dist_to_I076 ~ "I-76",
# dist_to_closest_highway == dist_to_I095 ~ "I-95",
# dist_to_closest_highway == dist_to_I676 ~ "I-676",
# .default = NA),
# closest_highway = factor(closest_highway, levels = c("US-1", "I-76", "I-676", "I-95")),
# highway_type = case_when(closest_highway %in% c("US-1","I-676") ~ "through neighborhood",
# closest_highway %in% c("I-95","I-76") ~ "along waterway",
# .default = NA))
# save file
# st_write(prop500, "~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_prop500_dst2hghwy_250318.geojson")
# read in pre-saved file
prop500 <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_prop500_dst2hghwy_250318.geojson") %>%
mutate(closest_highway = factor(closest_highway, levels = c("US-1", "I-76", "I-676", "I-95")))
## Reading layer `philly_prop500_dst2hghwy_250318' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_prop500_dst2hghwy_250318.geojson'
## using driver `GeoJSON'
## Simple feature collection with 13343 features and 41 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 2662223 ymin: 212357.3 xmax: 2742918 ymax: 299389.6
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
First, let’s looks at a faceted plot of the distribution of sales price for each highway separately
# prop500_faceted <-
# ggplot(data = prop500,
# aes(x = dist_to_closest_highwaym, y = log_price_perTA)) +
# geom_point(color = "#1a9988", alpha = .1, size = 0.3) +
# geom_smooth(color = "#eb5600", method = "lm", se = F, linewidth = 1.2) +
# # geom_point(alpha = .1, size = 0.3) +
# # geom_smooth(method = "lm", se = F, linewidth = 1.2) +
# facet_wrap(~closest_highway) +
# labs(title = "Distribution of Property Sale Price",
# x = "Distance to Closest Highway (m)",
# y = "Log Sale Price (per TA)") +
# theme_minimal()
prop500_faceted <-
ggplot(data = prop500,
aes(x = log_price_perTA, color = closest_highway, fill = closest_highway)) +
geom_histogram(bins = 50) +
facet_wrap(~closest_highway, scales = "free_y") +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Distribution of Property Sale Price",
x = "Log Sale Price (per Total Area)",
y = "Count",
color = "Closest Highway",
fill = "Closest Highway") +
theme_minimal()
prop500_faceted
# save image for slides
# ggsave("Output/SalePriceDistribution.png", plot = prop500_faceted)
# residential + mixed only
# prop500_faceted_res <-
# ggplot(data = prop500 %>%
# filter(grepl("Reside", category_easy)),
# aes(x = dist_to_closest_highwaym, y = log_price_perTA)) +
# geom_point(color = "#1a9988", alpha = .1, size = 0.3) +
# geom_smooth(color = "#eb5600", method = "lm", se = F, linewidth = 1.2) +
# # geom_point(alpha = .1, size = 0.3) +
# # geom_smooth(method = "lm", se = F, linewidth = 1.2) +
# # facet_wrap(closest_highway) +
# labs(title = "Distribution of Property Sale Price",
# x = "Distance to Closest Highway (m)",
# y = "Log Sale Price (per TA)") +
# theme_minimal()
prop500_faceted_res <-
ggplot(data = prop500 %>%
filter(grepl("Resid", category_easy)),
aes(x = log_price_perTA, color = closest_highway, fill = closest_highway)) +
geom_histogram(bins = 50) +
facet_wrap(~closest_highway, scales = "free_y") +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Distribution of Property Sale Price",
subtitle = "Residential and Mixed Properties only",
x = "Log Sale Price (per Total Area)",
y = "Count",
color = "Closest Highway",
fill = "Closest Highway") +
theme_minimal()
prop500_faceted_res
# save image for slides
# ggsave("Output/SalePriceDistribution_residential.png", plot = prop500_faceted_res)
Now, look at relationship between distance to highway and price.
# map of properties within 500m of highway cored by which highway they're closest to
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887", color = "grey") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = highways_500buffer,
color = "transparent", fill = "grey", alpha = 0.4) +
geom_sf(data = prop500,
aes(color = closest_highway), size = 0.3) +
geom_sf(data = philly_mainhighways,
color = "black") +
# add custom colors
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Properties within 500m of Highways in Philly",
subtitle = "US-1, I-76, I-95, and I-676",
color = "Closest Highway") +
theme_void()
# simple scatter of price and dist2closesthighway
ggplot(data = prop500,
aes(x = dist_to_closest_highwaym, y = adjusted_sale_price,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .2) +
geom_smooth(method = "lm") +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Property Price as a function of Distance to Closest Highway",
subtitle = "Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway",
x = "Distance to Closest Highway (m)",
y = "Property Sale Price ($)") +
theme_minimal()
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
cor_us1 <- round(cor(prop500 %>%
filter(grepl("US", closest_highway)) %>%
pull(log_price_perTA),
prop500 %>%
filter(grepl("US", closest_highway)) %>%
pull(dist_to_closest_highwaym)), 3)
cor_i76 <- round(cor(prop500 %>%
filter(grepl("I-76", closest_highway)) %>%
pull(log_price_perTA),
prop500 %>%
filter(grepl("I-76", closest_highway)) %>%
pull(dist_to_closest_highwaym)), 3)
cor_i95 <- round(cor(prop500 %>%
filter(grepl("I-95", closest_highway)) %>%
pull(log_price_perTA),
prop500 %>%
filter(grepl("I-95", closest_highway)) %>%
pull(dist_to_closest_highwaym)), 3)
cor_i676 <- round(cor(prop500 %>%
filter(grepl("I-676", closest_highway)) %>%
pull(log_price_perTA),
prop500 %>%
filter(grepl("I-676", closest_highway)) %>%
pull(dist_to_closest_highwaym)), 3)
scatter_4highways <-
ggplot(data = prop500,
aes(x = dist_to_closest_highwaym, y = log_price_perTA,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
geom_text(aes(label = cor_us1,
x = 150,
y = 1.25),
color = "#1a9988",
show.legend = F) +
geom_text(aes(label = cor_i76,
x = 350,
y = 1.25),
color = "#eb5600",
show.legend = F) +
geom_text(aes(label = cor_i95,
x = 350,
y = 8.75),
color = "#eba075",
show.legend = F) +
geom_text(aes(label = cor_i676,
x = 150,
y = 8.75),
color = "#7fe3d6",
show.legend = F) +
labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway",
subtitle = "Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway",
x = "Distance to Closest Highway (m)",
y = "Property Sale Price (log price per TA)") +
theme_minimal()
scatter_4highways
# save image for slides
# ggsave("Output/PriceVSDist2Highway_4highways.png",
# plot = scatter_4highways,
# height = 7,
# width = 10)
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
# sale_price <= 1e7
ggplot(data = prop500 %>%
filter(sale_price <= 1e7),
aes(x = dist_to_closest_highwaym, y = log_price_perTA,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway",
subtitle = "Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway",
x = "Distance to Closest Highway (m)",
y = "Property Sale Price (log price per TA)") +
theme_minimal()
# simple scatter of price (as log price per total area in sq ft) and log(dist2closesthighway)
ggplot(data = prop500,
aes(x = log(dist_to_closest_highwaym), y = log_price_perTA,
color = closest_highway, fill = closest_highway)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
scale_fill_manual(values = c("US-1" = "#1a9988",
"I-76" = "#eb5600",
"I-95" = "#eba075",
"I-676" = "#7fe3d6")) +
labs(title = "Property Price (log price per TA) as a function of Distance to Closest Highway (log)",
subtitle = "Properties within 500m of highways",
fill = "Closest Highway",
color = "Closest Highway",
x = "Log Distance to Closest Highway (m)",
y = "Log Property Sale Price (per TA)") +
theme_minimal()
# simple regression of price (log_price_perTA) and dist2closesthighway
fit1 <- lm(log_price_perTA ~ dist_to_closest_highwaym, data = prop500)
summary(fit1)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym, data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1967 -0.6524 -0.1810 0.6549 4.0119
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.433e+00 2.044e-02 265.807 < 2e-16 ***
## dist_to_closest_highwaym -3.010e-04 6.545e-05 -4.599 4.28e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.006 on 13341 degrees of freedom
## Multiple R-squared: 0.001583, Adjusted R-squared: 0.001508
## F-statistic: 21.15 on 1 and 13341 DF, p-value: 4.282e-06
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (4 types)
fit2 <- lm(log_price_perTA ~ dist_to_closest_highwaym + closest_highway, data = prop500)
summary(fit2)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + closest_highway,
## data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.6275 -0.4675 0.0543 0.4564 4.3238
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.897e+00 1.937e-02 252.834 < 2e-16 ***
## dist_to_closest_highwaym -3.731e-04 5.678e-05 -6.571 5.19e-11 ***
## closest_highwayI-76 1.051e+00 2.884e-02 36.454 < 2e-16 ***
## closest_highwayI-676 1.743e+00 6.418e-02 27.164 < 2e-16 ***
## closest_highwayI-95 9.845e-01 1.575e-02 62.496 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8657 on 13338 degrees of freedom
## Multiple R-squared: 0.2606, Adjusted R-squared: 0.2604
## F-statistic: 1175 on 4 and 13338 DF, p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (4 types) with interaction
fit3 <- lm(log_price_perTA ~ dist_to_closest_highwaym + closest_highway + dist_to_closest_highwaym*closest_highway, data = prop500)
summary(fit3)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + closest_highway +
## dist_to_closest_highwaym * closest_highway, data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7897 -0.4624 0.0515 0.4524 4.3061
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 4.764e+00 2.581e-02 184.604
## dist_to_closest_highwaym 1.050e-04 8.353e-05 1.257
## closest_highwayI-76 7.358e-01 8.645e-02 8.512
## closest_highwayI-676 1.599e+00 1.313e-01 12.171
## closest_highwayI-95 1.303e+00 3.625e-02 35.940
## dist_to_closest_highwaym:closest_highwayI-76 8.623e-04 2.481e-04 3.476
## dist_to_closest_highwaym:closest_highwayI-676 7.507e-04 5.035e-04 1.491
## dist_to_closest_highwaym:closest_highwayI-95 -1.142e-03 1.173e-04 -9.742
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## dist_to_closest_highwaym 0.208778
## closest_highwayI-76 < 2e-16 ***
## closest_highwayI-676 < 2e-16 ***
## closest_highwayI-95 < 2e-16 ***
## dist_to_closest_highwaym:closest_highwayI-76 0.000511 ***
## dist_to_closest_highwaym:closest_highwayI-676 0.135991
## dist_to_closest_highwaym:closest_highwayI-95 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8614 on 13335 degrees of freedom
## Multiple R-squared: 0.2681, Adjusted R-squared: 0.2678
## F-statistic: 697.9 on 7 and 13335 DF, p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types)
fit4 <- lm(log_price_perTA ~ dist_to_closest_highwaym + highway_type, data = prop500)
summary(fit4)
##
## Call:
## lm(formula = log_price_perTA ~ dist_to_closest_highwaym + highway_type,
## data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.6517 -0.4890 0.0290 0.4478 4.2732
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.908e+00 1.968e-02 300.203 < 2e-16 ***
## dist_to_closest_highwaym -4.293e-04 5.791e-05 -7.413 1.31e-13 ***
## highway_typethrough neighborhood -9.423e-01 1.545e-02 -61.008 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8894 on 13340 degrees of freedom
## Multiple R-squared: 0.2194, Adjusted R-squared: 0.2193
## F-statistic: 1874 on 2 and 13340 DF, p-value: < 2.2e-16
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types) with interaction
fit5 <- lm(log_price_perTA ~ dist_to_closest_highwaym + highway_type + dist_to_closest_highwaym*highway_type, data = prop500)
sum5 <- summary(fit5)
data.frame(sum5$coefficients) %>%
mutate(Estimate = round(Estimate, 3),
StdErorr = round(Std..Error, 3),
PValue = round(Pr...t.., 3)) %>%
select(Estimate, StdErorr, PValue) %>%
kbl(align = c("l", rep("c", 3)),
caption = "Linear Regression: Sale Price ~ Distance to Closest Highway + Highway Type + Interaction") %>%
kable_styling() %>%
kable_classic(full_width = F, html_font = "Cambria")
| Estimate | StdErorr | PValue | |
|---|---|---|---|
| (Intercept) | 6.008 | 0.025 | 0 |
| dist_to_closest_highwaym | -0.001 | 0.000 | 0 |
| highway_typethrough neighborhood | -1.154 | 0.036 | 0 |
| dist_to_closest_highwaym:highway_typethrough neighborhood | 0.001 | 0.000 | 0 |
# multiple regression of price (log_price_perTA) and dist2closesthighway but adding highway type (2 types) with interaction
fit6 <- lm(log_price_perTA ~ log(dist_to_closest_highwaym) + highway_type + log(dist_to_closest_highwaym)*highway_type, data = prop500)
summary(fit6)
##
## Call:
## lm(formula = log_price_perTA ~ log(dist_to_closest_highwaym) +
## highway_type + log(dist_to_closest_highwaym) * highway_type,
## data = prop500)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8095 -0.4831 0.0275 0.4438 4.2556
##
## Coefficients:
## Estimate
## (Intercept) 6.53100
## log(dist_to_closest_highwaym) -0.13573
## highway_typethrough neighborhood -1.70492
## log(dist_to_closest_highwaym):highway_typethrough neighborhood 0.13948
## Std. Error
## (Intercept) 0.09023
## log(dist_to_closest_highwaym) 0.01630
## highway_typethrough neighborhood 0.13197
## log(dist_to_closest_highwaym):highway_typethrough neighborhood 0.02393
## t value Pr(>|t|)
## (Intercept) 72.385 < 2e-16
## log(dist_to_closest_highwaym) -8.328 < 2e-16
## highway_typethrough neighborhood -12.919 < 2e-16
## log(dist_to_closest_highwaym):highway_typethrough neighborhood 5.830 5.69e-09
##
## (Intercept) ***
## log(dist_to_closest_highwaym) ***
## highway_typethrough neighborhood ***
## log(dist_to_closest_highwaym):highway_typethrough neighborhood ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.889 on 13339 degrees of freedom
## Multiple R-squared: 0.2202, Adjusted R-squared: 0.22
## F-statistic: 1256 on 3 and 13339 DF, p-value: < 2.2e-16
Visualizing the highway types (cutting through neighborhoods vs running alongside water).
# simple scatter of price and dist2closesthighway
ggplot(data = prop500,
aes(x = dist_to_closest_highwaym, y = adjusted_sale_price,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm") +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
labs(title = "Property Price as a function of Distance to Highway Type",
subtitle = "Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type") +
theme_minimal()
# simple scatter of price and dist2closesthighway with loess method
ggplot(data = prop500,
aes(x = dist_to_closest_highwaym, y = sale_price,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "loess", se = FALSE, size = 1.2) +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
labs(title = "Property Price as a function of Distance to Highway Type",
subtitle = "Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type") +
theme_minimal()
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
cor_thru <- round(cor(prop500 %>%
filter(grepl("through", highway_type)) %>%
pull(log_price_perTA),
prop500 %>%
filter(grepl("through", highway_type)) %>%
pull(dist_to_closest_highwaym)), 3)
cor_water <- round(cor(prop500 %>%
filter(grepl("along", highway_type)) %>%
pull(log_price_perTA),
prop500 %>%
filter(grepl("along", highway_type)) %>%
pull(dist_to_closest_highwaym)), 3)
scatter_2types <-
ggplot(data = prop500,
aes(x = dist_to_closest_highwaym, y = log_price_perTA,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
geom_text(aes(label = cor_thru,
x = 250,
y = 1),
color = "#1a9988",
show.legend = F) +
geom_text(aes(label = cor_water,
x = 250,
y = 9),
color = "#eb5600",
show.legend = F) +
labs(title = "Property Price (log price per TA) as a function of Distance to Highway Type",
subtitle = "Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type",
x = "Distance to Closest Highway (m)",
y = "Log Sale Price (per TA)") +
theme_minimal()
scatter_2types
# save for slides
# ggsave("Output/PriceVSDist2Highway_2types.png",
# plot = scatter_2types,
# height = 7,
# width = 10)
# simple scatter of price (as log price per total area in sq ft) and dist2closesthighway
ggplot(data = prop500,
aes(x = log(dist_to_closest_highwaym), y = log_price_perTA,
color = highway_type, fill = highway_type)) +
geom_point(alpha = .2, size = 0.3) +
geom_smooth(method = "lm", size = 1.2) +
scale_color_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
scale_fill_manual(values = c("through neighborhood" = "#1a9988",
"along waterway" = "#eb5600")) +
labs(title = "Property Price (log price per TA) as a function of Distance to Highway Type",
subtitle = "Properties within 500m of highways",
fill = "Highway Type",
color = "Highway Type",
x = "Distance to Closest Highway (m)",
y = "Log Sale Price (per TA)") +
theme_minimal()
# correlations
prop500 %>%
st_drop_geometry() %>%
group_by(highway_type) %>%
summarize(correlation = cor(dist_to_closest_highwaym, log_price_perTA, method = "pearson"), .groups = "drop") %>%
mutate(correlation = round(correlation, 3)) %>%
kbl(col.names = c("Highway Type", "Correlation"),
align = c("l", "c"),
caption = "Correlation Between Property Sale Price and Distance to Closest Highway by Highway Types") %>%
kable_styling() %>%
kable_classic(full_width = F, html_font = "Cambria")
| Highway Type | Correlation |
|---|---|
| along waterway | -0.100 |
| through neighborhood | -0.006 |
# correlation of price and dist to highway for each highway
prop500 %>%
st_drop_geometry() %>%
group_by(closest_highway) %>%
summarize(correlation = cor(dist_to_closest_highwaym, log_price_perTA, method = "pearson"), .groups = "drop") %>%
mutate(correlation = round(correlation, 3)) %>%
kbl(col.names = c("Highway", "Correlation"),
align = c("l", "c"),
caption = "Correlation Between Property Sale Price and Distance to Closest Highway by Highway") %>%
kable_styling() %>%
kable_classic(html_font = "Cambria")
| Highway | Correlation |
|---|---|
| US-1 | 0.023 |
| I-76 | 0.107 |
| I-676 | 0.147 |
| I-95 | -0.134 |
prop500 %>%
st_drop_geometry() %>%
group_by(closest_highway) %>%
summarize(correlation = cor(log(dist_to_closest_highwaym), log_price_perTA, method = "pearson"), .groups = "drop")
## # A tibble: 4 × 2
## closest_highway correlation
## <fct> <dbl>
## 1 US-1 0.0276
## 2 I-76 0.141
## 3 I-676 0.203
## 4 I-95 -0.117
We’re looking at all properties in Philadelphia (sale date within the last 5 years, aka between Jan. 1st, 2020 - Dec.31st, 2024; sale price is < $30,000 and > 5 SD from the mean sale price; total area is < 100 sq. ft.; inflation-adjusted prices).
I’m using log_price = log(adjusted_sale_price) as the outcome variable and including internal features, amenities, and fixed effects in addition to distance to highway.
# takes about 7 min to run
# cleaned_properties <- philly_properties %>%
# mutate(price_perTLA = sale_price / total_livable_area,
# price_perTA = sale_price / total_area,
# log_price = log(sale_price),
# log_price_perTLA = log(price_perTLA),
# log_price_perTA = log(price_perTA),
# category_easy = case_when(category_code_description %in% c("APARTMENTS > 4 UNITS",
# "MIXED USE",
# "MULTI FAMILY",
# "SINGLE FAMILY") ~ "Residential or Mixed",
# grepl("INDUS", category_code_description) ~ "Industrial",
# grepl("HOTEL", category_code_description) ~ "Hotel",
# grepl("OFFICES", category_code_description) ~ "Offices",
# category_code_description %in% c("COMMERCIAL",
# "GARAGE - COMMERCIAL",
# "RETAIL") ~ "Commercial",
# grepl("VACANT", category_code_description) ~ "Vacant",
# .default = "Other"),
# dist_to_US001 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0001")),
# dist_to_I076 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0076")),
# dist_to_I095 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0095")),
# dist_to_I676 = calculate_nearest_distance(geometry,
# philly_mainhighways %>%
# filter(ST_RT_NO == "0676")),
# dist_park = calculate_nearest_distance(geometry,
# philly_parks),
# dist_lib = calculate_nearest_distance(geometry,
# libraries),
# dist_transit = calculate_nearest_distance(geometry,
# transit),
# dist_school = calculate_nearest_distance(geometry,
# school),
# dist_cityhall = calculate_nearest_distance(geometry,
# city_hall),
# dist_hospital = calculate_nearest_distance(geometry,
# hospitals),
# dist_museum = calculate_nearest_distance(geometry,
# museums),
# dist_prison = calculate_nearest_distance(geometry,
# prisons),
# dist_historic = calculate_nearest_distance(geometry,
# historic),
# dist_to_US001m = dist_to_US001 * 0.3048,
# dist_to_I076m = dist_to_I076 * 0.3048,
# dist_to_I095m = dist_to_I095 * 0.3048,
# dist_to_I676m = dist_to_I676 * 0.3048,
# dist_parkm = dist_park * 0.3048,
# dist_libm = dist_lib * 0.3048,
# dist_transitm = dist_transit * 0.3048,
# dist_schoolm = dist_school * 0.3048,
# dist_cityhallm = dist_cityhall * 0.3048,
# dist_hospitalm = dist_hospital * 0.3048,
# dist_museumm = dist_museum * 0.3048,
# dist_prisonm = dist_prison * 0.3048,
# dist_historicm = dist_historic * 0.3048)
# find closest highway
# clean_closest <- cleaned_properties %>%
# select(matches("objectid|dist_to_US|dist_to_I")) %>%
# pivot_longer(cols = 2:5,
# names_to = "highway",
# values_to = "distance") %>%
# group_by(objectid) %>%
# summarise(dist_to_closest_highway = min(distance, na.rm = T)) %>%
# ungroup() %>%
# mutate(dist_to_closest_highwaym = dist_to_closest_highway * 0.3048)
# join closest highway
# cleaned_props <- left_join(cleaned_properties,
# clean_closest %>% st_drop_geometry(),
# by = "objectid") %>%
# mutate(closest_highway = case_when(
# dist_to_closest_highway == dist_to_US001 ~ "US-1",
# dist_to_closest_highway == dist_to_I076 ~ "I-76",
# dist_to_closest_highway == dist_to_I095 ~ "I-95",
# dist_to_closest_highway == dist_to_I676 ~ "I-676",
# .default = NA),
# closest_highway = factor(closest_highway, levels = c("US-1", "I-76", "I-676", "I-95")),
# highway_type = case_when(closest_highway %in% c("US-1","I-676") ~ "through neighborhood",
# closest_highway %in% c("I-95","I-76") ~ "along waterway",
# .default = NA)) %>%
# filter(!is.na(closest_highway))
# save for easier access later
# st_write(cleaned_props,
# "~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_cleaned_properties_with_amenities_250318.geojson")
# read in pre-saved data
cleaned_props <- st_read("~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_cleaned_properties_with_amenities.geojson") %>%
mutate(year_built = as.numeric(year_built))
## Reading layer `philly_cleaned_properties_with_amenities' from data source
## `/Users/akiradisandro/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown/data/philly_cleaned_properties_with_amenities.geojson'
## using driver `GeoJSON'
## Simple feature collection with 244003 features and 117 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 2662223 ymin: 210016.8 xmax: 2749428 ymax: 304861
## Projected CRS: NAD83 / Pennsylvania South (ftUS)
Looking at correlation matrix of internal characteristics.
# model with no interactions
mult_reg <- lm(log_price ~ dist_to_closest_highwaym +
total_area + year_built + # internal characteristics
dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm + dist_historicm + # external / amenities
factor(zip_code), # neighborhood effects (via zip code for now)
data = cleaned_props)
summary(mult_reg)
##
## Call:
## lm(formula = log_price ~ dist_to_closest_highwaym + total_area +
## year_built + dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm +
## dist_historicm + factor(zip_code), data = cleaned_props)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.7283 -0.4026 0.0158 0.4234 5.9466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.298e+01 9.082e-02 142.874 < 2e-16 ***
## dist_to_closest_highwaym -3.267e-05 2.673e-06 -12.222 < 2e-16 ***
## total_area 1.406e-07 1.166e-08 12.059 < 2e-16 ***
## year_built 8.137e-04 3.015e-05 26.994 < 2e-16 ***
## dist_parkm 1.874e-04 1.013e-05 18.498 < 2e-16 ***
## dist_libm 2.197e-05 4.309e-06 5.098 3.44e-07 ***
## dist_cityhallm 3.409e-05 2.240e-06 15.222 < 2e-16 ***
## dist_hospitalm -1.537e-04 3.399e-06 -45.216 < 2e-16 ***
## dist_historicm 4.187e-05 2.987e-06 14.016 < 2e-16 ***
## factor(zip_code)19103 -5.293e-01 7.495e-02 -7.062 1.64e-12 ***
## factor(zip_code)19104 -2.508e+00 7.245e-02 -34.612 < 2e-16 ***
## factor(zip_code)19106 -9.430e-01 7.575e-02 -12.450 < 2e-16 ***
## factor(zip_code)19107 -7.211e-01 7.760e-02 -9.292 < 2e-16 ***
## factor(zip_code)19109 1.210e+00 4.842e-01 2.499 0.0124 *
## factor(zip_code)19111 -2.781e+00 7.655e-02 -36.327 < 2e-16 ***
## factor(zip_code)19112 -1.519e-02 2.415e-01 -0.063 0.9498
## factor(zip_code)19114 -2.656e+00 8.207e-02 -32.362 < 2e-16 ***
## factor(zip_code)19115 -2.283e+00 8.187e-02 -27.881 < 2e-16 ***
## factor(zip_code)19116 -2.431e+00 8.571e-02 -28.360 < 2e-16 ***
## factor(zip_code)19118 -1.376e+00 7.955e-02 -17.299 < 2e-16 ***
## factor(zip_code)19119 -2.011e+00 7.554e-02 -26.630 < 2e-16 ***
## factor(zip_code)19120 -3.360e+00 7.429e-02 -45.227 < 2e-16 ***
## factor(zip_code)19121 -2.424e+00 7.210e-02 -33.618 < 2e-16 ***
## factor(zip_code)19122 -2.315e+00 7.288e-02 -31.763 < 2e-16 ***
## factor(zip_code)19123 -1.583e+00 7.302e-02 -21.682 < 2e-16 ***
## factor(zip_code)19124 -3.329e+00 7.446e-02 -44.709 < 2e-16 ***
## factor(zip_code)19125 -2.248e+00 7.261e-02 -30.954 < 2e-16 ***
## factor(zip_code)19126 -2.739e+00 7.685e-02 -35.643 < 2e-16 ***
## factor(zip_code)19127 -2.059e+00 7.620e-02 -27.017 < 2e-16 ***
## factor(zip_code)19128 -2.220e+00 7.456e-02 -29.783 < 2e-16 ***
## factor(zip_code)19129 -2.342e+00 7.499e-02 -31.230 < 2e-16 ***
## factor(zip_code)19130 -1.373e+00 7.236e-02 -18.973 < 2e-16 ***
## factor(zip_code)19131 -2.987e+00 7.295e-02 -40.947 < 2e-16 ***
## factor(zip_code)19132 -3.667e+00 7.239e-02 -50.647 < 2e-16 ***
## factor(zip_code)19133 -3.645e+00 7.331e-02 -49.723 < 2e-16 ***
## factor(zip_code)19134 -3.428e+00 7.284e-02 -47.065 < 2e-16 ***
## factor(zip_code)19135 -2.807e+00 7.626e-02 -36.809 < 2e-16 ***
## factor(zip_code)19136 -2.781e+00 7.831e-02 -35.516 < 2e-16 ***
## factor(zip_code)19137 -2.765e+00 7.719e-02 -35.814 < 2e-16 ***
## factor(zip_code)19138 -3.059e+00 7.533e-02 -40.616 < 2e-16 ***
## factor(zip_code)19139 -3.058e+00 7.268e-02 -42.070 < 2e-16 ***
## factor(zip_code)19140 -3.754e+00 7.283e-02 -51.551 < 2e-16 ***
## factor(zip_code)19141 -3.218e+00 7.465e-02 -43.113 < 2e-16 ***
## factor(zip_code)19142 -3.112e+00 7.313e-02 -42.563 < 2e-16 ***
## factor(zip_code)19143 -2.897e+00 7.231e-02 -40.060 < 2e-16 ***
## factor(zip_code)19144 -2.792e+00 7.400e-02 -37.736 < 2e-16 ***
## factor(zip_code)19145 -2.512e+00 7.168e-02 -35.044 < 2e-16 ***
## factor(zip_code)19146 -2.039e+00 7.149e-02 -28.526 < 2e-16 ***
## factor(zip_code)19147 -1.607e+00 7.167e-02 -22.428 < 2e-16 ***
## factor(zip_code)19148 -2.420e+00 7.170e-02 -33.748 < 2e-16 ***
## factor(zip_code)19149 -2.888e+00 7.612e-02 -37.941 < 2e-16 ***
## factor(zip_code)19150 -2.483e+00 7.722e-02 -32.155 < 2e-16 ***
## factor(zip_code)19151 -2.843e+00 7.363e-02 -38.609 < 2e-16 ***
## factor(zip_code)19152 -2.676e+00 7.907e-02 -33.840 < 2e-16 ***
## factor(zip_code)19153 -2.426e+00 7.548e-02 -32.140 < 2e-16 ***
## factor(zip_code)19154 -2.879e+00 8.564e-02 -33.623 < 2e-16 ***
## factor(zip_code)19192 -1.055e+00 8.326e-01 -1.267 0.2053
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8296 on 243946 degrees of freedom
## Multiple R-squared: 0.337, Adjusted R-squared: 0.3369
## F-statistic: 2215 on 56 and 243946 DF, p-value: < 2.2e-16
# model with interaction between dist to closest highway and ZIP
mult_reg_int <- lm(log_price ~ dist_to_closest_highwaym * factor(zip_code) +
total_area + year_built + # internal characteristics
dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm + dist_historicm, # external / amenities
data = cleaned_props)
summary(mult_reg_int)
##
## Call:
## lm(formula = log_price ~ dist_to_closest_highwaym * factor(zip_code) +
## total_area + year_built + dist_parkm + dist_libm + dist_cityhallm +
## dist_hospitalm + dist_historicm, data = cleaned_props)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5339 -0.3954 0.0125 0.4030 5.6840
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 1.396e+01 2.073e-01 67.366
## dist_to_closest_highwaym -7.427e-04 1.806e-04 -4.112
## factor(zip_code)19103 -1.439e+00 2.051e-01 -7.017
## factor(zip_code)19104 -3.731e+00 2.017e-01 -18.493
## factor(zip_code)19106 -1.918e+00 2.037e-01 -9.417
## factor(zip_code)19107 -1.579e+00 2.105e-01 -7.500
## factor(zip_code)19109 1.131e+00 4.721e-01 2.397
## factor(zip_code)19111 -2.965e+00 2.028e-01 -14.616
## factor(zip_code)19112 -1.037e+00 6.905e-01 -1.502
## factor(zip_code)19114 -2.074e+00 2.073e-01 -10.005
## factor(zip_code)19115 -1.930e+00 2.066e-01 -9.341
## factor(zip_code)19116 -1.550e+00 2.100e-01 -7.382
## factor(zip_code)19118 -1.121e+00 2.985e-01 -3.757
## factor(zip_code)19119 -2.759e+00 2.178e-01 -12.671
## factor(zip_code)19120 -3.602e+00 2.010e-01 -17.924
## factor(zip_code)19121 -3.084e+00 2.021e-01 -15.254
## factor(zip_code)19122 -1.982e+00 2.033e-01 -9.751
## factor(zip_code)19123 -1.804e+00 2.018e-01 -8.938
## factor(zip_code)19124 -3.093e+00 2.019e-01 -15.316
## factor(zip_code)19125 -2.137e+00 2.004e-01 -10.663
## factor(zip_code)19126 -2.582e+00 2.286e-01 -11.292
## factor(zip_code)19127 -2.727e+00 2.140e-01 -12.742
## factor(zip_code)19128 -2.770e+00 2.011e-01 -13.769
## factor(zip_code)19129 -2.547e+00 2.038e-01 -12.495
## factor(zip_code)19130 -1.631e+00 2.053e-01 -7.942
## factor(zip_code)19131 -2.574e+00 2.019e-01 -12.750
## factor(zip_code)19132 -4.282e+00 2.027e-01 -21.124
## factor(zip_code)19133 -2.316e+00 2.266e-01 -10.218
## factor(zip_code)19134 -2.285e+00 2.009e-01 -11.373
## factor(zip_code)19135 -2.501e+00 2.033e-01 -12.301
## factor(zip_code)19136 -2.512e+00 2.051e-01 -12.249
## factor(zip_code)19137 -2.760e+00 2.048e-01 -13.473
## factor(zip_code)19138 -5.220e+00 2.076e-01 -25.140
## factor(zip_code)19139 -3.893e+00 2.088e-01 -18.646
## factor(zip_code)19140 -3.976e+00 2.009e-01 -19.792
## factor(zip_code)19141 -3.880e+00 2.024e-01 -19.175
## factor(zip_code)19142 -2.863e+00 2.158e-01 -13.266
## factor(zip_code)19143 -3.529e+00 2.015e-01 -17.519
## factor(zip_code)19144 -3.415e+00 2.013e-01 -16.959
## factor(zip_code)19145 -3.428e+00 2.008e-01 -17.072
## factor(zip_code)19146 -2.837e+00 2.003e-01 -14.167
## factor(zip_code)19147 -2.014e+00 2.001e-01 -10.064
## factor(zip_code)19148 -2.775e+00 2.002e-01 -13.862
## factor(zip_code)19149 -2.781e+00 2.023e-01 -13.748
## factor(zip_code)19150 -1.916e+00 2.489e-01 -7.697
## factor(zip_code)19151 -2.708e+00 2.023e-01 -13.382
## factor(zip_code)19152 -2.654e+00 2.044e-01 -12.982
## factor(zip_code)19153 -4.058e-01 2.194e-01 -1.850
## factor(zip_code)19154 -1.787e+00 2.127e-01 -8.403
## factor(zip_code)19192 -1.267e+00 8.126e-01 -1.559
## total_area 1.384e-07 1.136e-08 12.190
## year_built 7.406e-04 2.945e-05 25.143
## dist_parkm 1.458e-04 1.014e-05 14.379
## dist_libm 4.331e-05 4.557e-06 9.505
## dist_cityhallm -1.855e-05 2.818e-06 -6.582
## dist_hospitalm -2.067e-04 4.052e-06 -50.997
## dist_historicm -6.166e-05 3.713e-06 -16.609
## dist_to_closest_highwaym:factor(zip_code)19103 1.067e-03 1.952e-04 5.465
## dist_to_closest_highwaym:factor(zip_code)19104 1.401e-03 1.827e-04 7.668
## dist_to_closest_highwaym:factor(zip_code)19106 1.657e-03 2.076e-04 7.981
## dist_to_closest_highwaym:factor(zip_code)19107 8.703e-04 1.922e-04 4.528
## dist_to_closest_highwaym:factor(zip_code)19109 NA NA NA
## dist_to_closest_highwaym:factor(zip_code)19111 8.598e-04 1.809e-04 4.753
## dist_to_closest_highwaym:factor(zip_code)19112 1.755e-03 7.761e-04 2.261
## dist_to_closest_highwaym:factor(zip_code)19114 5.741e-04 1.817e-04 3.159
## dist_to_closest_highwaym:factor(zip_code)19115 7.756e-04 1.810e-04 4.284
## dist_to_closest_highwaym:factor(zip_code)19116 7.958e-04 1.811e-04 4.394
## dist_to_closest_highwaym:factor(zip_code)19118 6.837e-04 1.836e-04 3.725
## dist_to_closest_highwaym:factor(zip_code)19119 8.939e-04 1.817e-04 4.921
## dist_to_closest_highwaym:factor(zip_code)19120 9.434e-04 1.810e-04 5.212
## dist_to_closest_highwaym:factor(zip_code)19121 8.344e-04 1.815e-04 4.598
## dist_to_closest_highwaym:factor(zip_code)19122 1.657e-04 1.828e-04 0.906
## dist_to_closest_highwaym:factor(zip_code)19123 1.506e-04 1.893e-04 0.795
## dist_to_closest_highwaym:factor(zip_code)19124 6.653e-04 1.811e-04 3.674
## dist_to_closest_highwaym:factor(zip_code)19125 3.181e-04 1.818e-04 1.750
## dist_to_closest_highwaym:factor(zip_code)19126 7.213e-04 1.831e-04 3.939
## dist_to_closest_highwaym:factor(zip_code)19127 9.527e-04 1.832e-04 5.199
## dist_to_closest_highwaym:factor(zip_code)19128 8.400e-04 1.807e-04 4.648
## dist_to_closest_highwaym:factor(zip_code)19129 4.032e-04 1.929e-04 2.091
## dist_to_closest_highwaym:factor(zip_code)19130 3.469e-04 1.877e-04 1.848
## dist_to_closest_highwaym:factor(zip_code)19131 2.834e-04 1.814e-04 1.562
## dist_to_closest_highwaym:factor(zip_code)19132 8.471e-04 1.814e-04 4.670
## dist_to_closest_highwaym:factor(zip_code)19133 6.323e-05 1.856e-04 0.341
## dist_to_closest_highwaym:factor(zip_code)19134 -1.265e-04 1.809e-04 -0.699
## dist_to_closest_highwaym:factor(zip_code)19135 6.642e-04 1.827e-04 3.635
## dist_to_closest_highwaym:factor(zip_code)19136 6.627e-04 1.824e-04 3.632
## dist_to_closest_highwaym:factor(zip_code)19137 1.603e-03 1.951e-04 8.219
## dist_to_closest_highwaym:factor(zip_code)19138 1.305e-03 1.812e-04 7.203
## dist_to_closest_highwaym:factor(zip_code)19139 9.590e-04 1.820e-04 5.269
## dist_to_closest_highwaym:factor(zip_code)19140 6.053e-04 1.813e-04 3.339
## dist_to_closest_highwaym:factor(zip_code)19141 9.893e-04 1.813e-04 5.458
## dist_to_closest_highwaym:factor(zip_code)19142 5.484e-04 1.829e-04 2.998
## dist_to_closest_highwaym:factor(zip_code)19143 8.714e-04 1.810e-04 4.815
## dist_to_closest_highwaym:factor(zip_code)19144 9.529e-04 1.810e-04 5.266
## dist_to_closest_highwaym:factor(zip_code)19145 1.053e-03 1.817e-04 5.797
## dist_to_closest_highwaym:factor(zip_code)19146 9.243e-04 1.814e-04 5.096
## dist_to_closest_highwaym:factor(zip_code)19147 5.426e-04 1.815e-04 2.989
## dist_to_closest_highwaym:factor(zip_code)19148 6.664e-04 1.813e-04 3.676
## dist_to_closest_highwaym:factor(zip_code)19149 8.500e-04 1.823e-04 4.662
## dist_to_closest_highwaym:factor(zip_code)19150 6.773e-04 1.823e-04 3.715
## dist_to_closest_highwaym:factor(zip_code)19151 4.405e-04 1.817e-04 2.424
## dist_to_closest_highwaym:factor(zip_code)19152 8.300e-04 1.826e-04 4.546
## dist_to_closest_highwaym:factor(zip_code)19153 -4.377e-04 1.864e-04 -2.348
## dist_to_closest_highwaym:factor(zip_code)19154 5.907e-04 1.816e-04 3.253
## dist_to_closest_highwaym:factor(zip_code)19192 NA NA NA
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## dist_to_closest_highwaym 3.92e-05 ***
## factor(zip_code)19103 2.28e-12 ***
## factor(zip_code)19104 < 2e-16 ***
## factor(zip_code)19106 < 2e-16 ***
## factor(zip_code)19107 6.41e-14 ***
## factor(zip_code)19109 0.016547 *
## factor(zip_code)19111 < 2e-16 ***
## factor(zip_code)19112 0.133123
## factor(zip_code)19114 < 2e-16 ***
## factor(zip_code)19115 < 2e-16 ***
## factor(zip_code)19116 1.57e-13 ***
## factor(zip_code)19118 0.000172 ***
## factor(zip_code)19119 < 2e-16 ***
## factor(zip_code)19120 < 2e-16 ***
## factor(zip_code)19121 < 2e-16 ***
## factor(zip_code)19122 < 2e-16 ***
## factor(zip_code)19123 < 2e-16 ***
## factor(zip_code)19124 < 2e-16 ***
## factor(zip_code)19125 < 2e-16 ***
## factor(zip_code)19126 < 2e-16 ***
## factor(zip_code)19127 < 2e-16 ***
## factor(zip_code)19128 < 2e-16 ***
## factor(zip_code)19129 < 2e-16 ***
## factor(zip_code)19130 1.99e-15 ***
## factor(zip_code)19131 < 2e-16 ***
## factor(zip_code)19132 < 2e-16 ***
## factor(zip_code)19133 < 2e-16 ***
## factor(zip_code)19134 < 2e-16 ***
## factor(zip_code)19135 < 2e-16 ***
## factor(zip_code)19136 < 2e-16 ***
## factor(zip_code)19137 < 2e-16 ***
## factor(zip_code)19138 < 2e-16 ***
## factor(zip_code)19139 < 2e-16 ***
## factor(zip_code)19140 < 2e-16 ***
## factor(zip_code)19141 < 2e-16 ***
## factor(zip_code)19142 < 2e-16 ***
## factor(zip_code)19143 < 2e-16 ***
## factor(zip_code)19144 < 2e-16 ***
## factor(zip_code)19145 < 2e-16 ***
## factor(zip_code)19146 < 2e-16 ***
## factor(zip_code)19147 < 2e-16 ***
## factor(zip_code)19148 < 2e-16 ***
## factor(zip_code)19149 < 2e-16 ***
## factor(zip_code)19150 1.39e-14 ***
## factor(zip_code)19151 < 2e-16 ***
## factor(zip_code)19152 < 2e-16 ***
## factor(zip_code)19153 0.064321 .
## factor(zip_code)19154 < 2e-16 ***
## factor(zip_code)19192 0.118919
## total_area < 2e-16 ***
## year_built < 2e-16 ***
## dist_parkm < 2e-16 ***
## dist_libm < 2e-16 ***
## dist_cityhallm 4.65e-11 ***
## dist_hospitalm < 2e-16 ***
## dist_historicm < 2e-16 ***
## dist_to_closest_highwaym:factor(zip_code)19103 4.64e-08 ***
## dist_to_closest_highwaym:factor(zip_code)19104 1.76e-14 ***
## dist_to_closest_highwaym:factor(zip_code)19106 1.46e-15 ***
## dist_to_closest_highwaym:factor(zip_code)19107 5.95e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19109 NA
## dist_to_closest_highwaym:factor(zip_code)19111 2.01e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19112 0.023749 *
## dist_to_closest_highwaym:factor(zip_code)19114 0.001585 **
## dist_to_closest_highwaym:factor(zip_code)19115 1.83e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19116 1.12e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19118 0.000196 ***
## dist_to_closest_highwaym:factor(zip_code)19119 8.64e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19120 1.87e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19121 4.27e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19122 0.364812
## dist_to_closest_highwaym:factor(zip_code)19123 0.426412
## dist_to_closest_highwaym:factor(zip_code)19124 0.000239 ***
## dist_to_closest_highwaym:factor(zip_code)19125 0.080129 .
## dist_to_closest_highwaym:factor(zip_code)19126 8.19e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19127 2.00e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19128 3.35e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19129 0.036563 *
## dist_to_closest_highwaym:factor(zip_code)19130 0.064560 .
## dist_to_closest_highwaym:factor(zip_code)19131 0.118233
## dist_to_closest_highwaym:factor(zip_code)19132 3.02e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19133 0.733377
## dist_to_closest_highwaym:factor(zip_code)19134 0.484350
## dist_to_closest_highwaym:factor(zip_code)19135 0.000278 ***
## dist_to_closest_highwaym:factor(zip_code)19136 0.000281 ***
## dist_to_closest_highwaym:factor(zip_code)19137 < 2e-16 ***
## dist_to_closest_highwaym:factor(zip_code)19138 5.91e-13 ***
## dist_to_closest_highwaym:factor(zip_code)19139 1.37e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19140 0.000841 ***
## dist_to_closest_highwaym:factor(zip_code)19141 4.82e-08 ***
## dist_to_closest_highwaym:factor(zip_code)19142 0.002718 **
## dist_to_closest_highwaym:factor(zip_code)19143 1.47e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19144 1.40e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19145 6.76e-09 ***
## dist_to_closest_highwaym:factor(zip_code)19146 3.47e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19147 0.002801 **
## dist_to_closest_highwaym:factor(zip_code)19148 0.000237 ***
## dist_to_closest_highwaym:factor(zip_code)19149 3.13e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19150 0.000204 ***
## dist_to_closest_highwaym:factor(zip_code)19151 0.015334 *
## dist_to_closest_highwaym:factor(zip_code)19152 5.47e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19153 0.018892 *
## dist_to_closest_highwaym:factor(zip_code)19154 0.001142 **
## dist_to_closest_highwaym:factor(zip_code)19192 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.808 on 243900 degrees of freedom
## Multiple R-squared: 0.3712, Adjusted R-squared: 0.371
## F-statistic: 1412 on 102 and 243900 DF, p-value: < 2.2e-16
Scaling all numeric variables to directly compare effect sizes.
scaled_vars <- cleaned_props %>%
select(log_price, dist_to_closest_highwaym, total_area, year_built, dist_parkm, dist_libm, dist_cityhallm, dist_hospitalm, dist_historicm, zip_code) %>%
mutate(across(c(dist_to_closest_highwaym, total_area, year_built, dist_parkm, dist_libm, dist_cityhallm, dist_hospitalm, dist_historicm), scale))
# model with scaled vars and no interactions
scaled_mult_reg <- lm(log_price ~ dist_to_closest_highwaym +
total_area + year_built + # internal characteristics
dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm + dist_historicm + # external / amenities
factor(zip_code), # neighborhood effects (via zip code for now)
data = scaled_vars)
summary(scaled_mult_reg)
##
## Call:
## lm(formula = log_price ~ dist_to_closest_highwaym + total_area +
## year_built + dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm +
## dist_historicm + factor(zip_code), data = scaled_vars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.7283 -0.4026 0.0158 0.4234 5.9466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.667004 0.072910 201.167 < 2e-16 ***
## dist_to_closest_highwaym -0.041383 0.003386 -12.222 < 2e-16 ***
## total_area 0.020262 0.001680 12.059 < 2e-16 ***
## year_built 0.046906 0.001738 26.994 < 2e-16 ***
## dist_parkm 0.036251 0.001960 18.498 < 2e-16 ***
## dist_libm 0.010648 0.002089 5.098 3.44e-07 ***
## dist_cityhallm 0.177281 0.011647 15.222 < 2e-16 ***
## dist_hospitalm -0.153718 0.003400 -45.216 < 2e-16 ***
## dist_historicm 0.053551 0.003821 14.016 < 2e-16 ***
## factor(zip_code)19103 -0.529311 0.074951 -7.062 1.64e-12 ***
## factor(zip_code)19104 -2.507668 0.072452 -34.612 < 2e-16 ***
## factor(zip_code)19106 -0.943043 0.075748 -12.450 < 2e-16 ***
## factor(zip_code)19107 -0.721091 0.077604 -9.292 < 2e-16 ***
## factor(zip_code)19109 1.210102 0.484179 2.499 0.0124 *
## factor(zip_code)19111 -2.780951 0.076553 -36.327 < 2e-16 ***
## factor(zip_code)19112 -0.015193 0.241531 -0.063 0.9498
## factor(zip_code)19114 -2.656065 0.082073 -32.362 < 2e-16 ***
## factor(zip_code)19115 -2.282731 0.081874 -27.881 < 2e-16 ***
## factor(zip_code)19116 -2.430872 0.085714 -28.360 < 2e-16 ***
## factor(zip_code)19118 -1.376155 0.079551 -17.299 < 2e-16 ***
## factor(zip_code)19119 -2.011487 0.075536 -26.630 < 2e-16 ***
## factor(zip_code)19120 -3.359982 0.074291 -45.227 < 2e-16 ***
## factor(zip_code)19121 -2.423843 0.072100 -33.618 < 2e-16 ***
## factor(zip_code)19122 -2.314892 0.072881 -31.763 < 2e-16 ***
## factor(zip_code)19123 -1.583329 0.073023 -21.682 < 2e-16 ***
## factor(zip_code)19124 -3.329156 0.074463 -44.709 < 2e-16 ***
## factor(zip_code)19125 -2.247696 0.072614 -30.954 < 2e-16 ***
## factor(zip_code)19126 -2.739012 0.076846 -35.643 < 2e-16 ***
## factor(zip_code)19127 -2.058590 0.076196 -27.017 < 2e-16 ***
## factor(zip_code)19128 -2.220476 0.074555 -29.783 < 2e-16 ***
## factor(zip_code)19129 -2.342086 0.074994 -31.230 < 2e-16 ***
## factor(zip_code)19130 -1.372904 0.072362 -18.973 < 2e-16 ***
## factor(zip_code)19131 -2.987252 0.072954 -40.947 < 2e-16 ***
## factor(zip_code)19132 -3.666521 0.072394 -50.647 < 2e-16 ***
## factor(zip_code)19133 -3.644950 0.073305 -49.723 < 2e-16 ***
## factor(zip_code)19134 -3.428068 0.072836 -47.065 < 2e-16 ***
## factor(zip_code)19135 -2.806929 0.076256 -36.809 < 2e-16 ***
## factor(zip_code)19136 -2.781071 0.078306 -35.516 < 2e-16 ***
## factor(zip_code)19137 -2.764584 0.077193 -35.814 < 2e-16 ***
## factor(zip_code)19138 -3.059442 0.075325 -40.616 < 2e-16 ***
## factor(zip_code)19139 -3.057536 0.072677 -42.070 < 2e-16 ***
## factor(zip_code)19140 -3.754406 0.072829 -51.551 < 2e-16 ***
## factor(zip_code)19141 -3.218457 0.074651 -43.113 < 2e-16 ***
## factor(zip_code)19142 -3.112464 0.073126 -42.563 < 2e-16 ***
## factor(zip_code)19143 -2.896724 0.072309 -40.060 < 2e-16 ***
## factor(zip_code)19144 -2.792299 0.073995 -37.736 < 2e-16 ***
## factor(zip_code)19145 -2.512029 0.071683 -35.044 < 2e-16 ***
## factor(zip_code)19146 -2.039442 0.071495 -28.526 < 2e-16 ***
## factor(zip_code)19147 -1.607364 0.071667 -22.428 < 2e-16 ***
## factor(zip_code)19148 -2.419911 0.071705 -33.748 < 2e-16 ***
## factor(zip_code)19149 -2.887972 0.076118 -37.941 < 2e-16 ***
## factor(zip_code)19150 -2.482971 0.077218 -32.155 < 2e-16 ***
## factor(zip_code)19151 -2.842632 0.073626 -38.609 < 2e-16 ***
## factor(zip_code)19152 -2.675783 0.079072 -33.840 < 2e-16 ***
## factor(zip_code)19153 -2.426069 0.075484 -32.140 < 2e-16 ***
## factor(zip_code)19154 -2.879380 0.085636 -33.623 < 2e-16 ***
## factor(zip_code)19192 -1.054654 0.832615 -1.267 0.2053
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8296 on 243946 degrees of freedom
## Multiple R-squared: 0.337, Adjusted R-squared: 0.3369
## F-statistic: 2215 on 56 and 243946 DF, p-value: < 2.2e-16
# model with scaled vars and interaction between dist to closest highway and ZIP
scaled_mult_reg_int <- lm(log_price ~ dist_to_closest_highwaym * factor(zip_code) +
total_area + year_built + # internal characteristics
dist_parkm + dist_libm + dist_cityhallm + dist_hospitalm + dist_historicm, # external / amenities
data = scaled_vars)
summary(scaled_mult_reg_int)
##
## Call:
## lm(formula = log_price ~ dist_to_closest_highwaym * factor(zip_code) +
## total_area + year_built + dist_parkm + dist_libm + dist_cityhallm +
## dist_hospitalm + dist_historicm, data = scaled_vars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5339 -0.3954 0.0125 0.4030 5.6840
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 13.572930 0.133979 101.307
## dist_to_closest_highwaym -0.940615 0.228746 -4.112
## factor(zip_code)19103 0.330072 0.156196 2.113
## factor(zip_code)19104 -1.408350 0.134142 -10.499
## factor(zip_code)19106 0.829401 0.191028 4.342
## factor(zip_code)19107 -0.135495 0.143970 -0.941
## factor(zip_code)19109 1.131406 0.472079 2.397
## factor(zip_code)19111 -1.538953 0.136868 -11.244
## factor(zip_code)19112 1.873142 0.682042 2.746
## factor(zip_code)19114 -1.122219 0.141755 -7.917
## factor(zip_code)19115 -0.643690 0.141431 -4.551
## factor(zip_code)19116 -0.230560 0.145248 -1.587
## factor(zip_code)19118 0.012282 0.214759 0.057
## factor(zip_code)19119 -1.276944 0.144665 -8.827
## factor(zip_code)19120 -2.037583 0.135467 -15.041
## factor(zip_code)19121 -1.699964 0.132979 -12.784
## factor(zip_code)19122 -1.707426 0.133782 -12.763
## factor(zip_code)19123 -1.554385 0.148651 -10.457
## factor(zip_code)19124 -1.989688 0.134910 -14.748
## factor(zip_code)19125 -1.609034 0.134899 -11.928
## factor(zip_code)19126 -1.385400 0.147894 -9.368
## factor(zip_code)19127 -1.147277 0.137465 -8.346
## factor(zip_code)19128 -1.376683 0.134861 -10.208
## factor(zip_code)19129 -1.878192 0.155389 -12.087
## factor(zip_code)19130 -1.055539 0.137972 -7.650
## factor(zip_code)19131 -2.103911 0.133673 -15.739
## factor(zip_code)19132 -2.877606 0.133360 -21.578
## factor(zip_code)19133 -2.210926 0.138348 -15.981
## factor(zip_code)19134 -2.494904 0.133763 -18.652
## factor(zip_code)19135 -1.399868 0.138517 -10.106
## factor(zip_code)19136 -1.413564 0.139262 -10.150
## factor(zip_code)19137 -0.101344 0.163125 -0.621
## factor(zip_code)19138 -3.055422 0.138189 -22.110
## factor(zip_code)19139 -2.302824 0.135429 -17.004
## factor(zip_code)19140 -2.972104 0.133784 -22.216
## factor(zip_code)19141 -2.239746 0.135042 -16.586
## factor(zip_code)19142 -1.953265 0.137641 -14.191
## factor(zip_code)19143 -2.084560 0.133395 -15.627
## factor(zip_code)19144 -1.834627 0.134525 -13.638
## factor(zip_code)19145 -1.681240 0.133048 -12.636
## factor(zip_code)19146 -1.304594 0.132816 -9.823
## factor(zip_code)19147 -1.113810 0.133341 -8.353
## factor(zip_code)19148 -1.669698 0.133081 -12.546
## factor(zip_code)19149 -1.371280 0.139817 -9.808
## factor(zip_code)19150 -0.792540 0.171537 -4.620
## factor(zip_code)19151 -1.977272 0.134252 -14.728
## factor(zip_code)19152 -1.277553 0.141465 -9.031
## factor(zip_code)19153 -1.131578 0.135705 -8.338
## factor(zip_code)19154 -0.807611 0.145804 -5.539
## factor(zip_code)19192 -1.267090 0.812586 -1.559
## total_area 0.019950 0.001637 12.190
## year_built 0.042689 0.001698 25.143
## dist_parkm 0.028203 0.001961 14.379
## dist_libm 0.020993 0.002209 9.505
## dist_cityhallm -0.096456 0.014654 -6.582
## dist_hospitalm -0.206689 0.004053 -50.997
## dist_historicm -0.078868 0.004749 -16.609
## dist_to_closest_highwaym:factor(zip_code)19103 1.351306 0.247277 5.465
## dist_to_closest_highwaym:factor(zip_code)19104 1.773832 0.231336 7.668
## dist_to_closest_highwaym:factor(zip_code)19106 2.098450 0.262941 7.981
## dist_to_closest_highwaym:factor(zip_code)19107 1.102255 0.243422 4.528
## dist_to_closest_highwaym:factor(zip_code)19109 NA NA NA
## dist_to_closest_highwaym:factor(zip_code)19111 1.088888 0.229114 4.753
## dist_to_closest_highwaym:factor(zip_code)19112 2.222729 0.982994 2.261
## dist_to_closest_highwaym:factor(zip_code)19114 0.727050 0.230173 3.159
## dist_to_closest_highwaym:factor(zip_code)19115 0.982316 0.229281 4.284
## dist_to_closest_highwaym:factor(zip_code)19116 1.007892 0.229397 4.394
## dist_to_closest_highwaym:factor(zip_code)19118 0.865879 0.232468 3.725
## dist_to_closest_highwaym:factor(zip_code)19119 1.132126 0.230082 4.921
## dist_to_closest_highwaym:factor(zip_code)19120 1.194875 0.229270 5.212
## dist_to_closest_highwaym:factor(zip_code)19121 1.056813 0.229853 4.598
## dist_to_closest_highwaym:factor(zip_code)19122 0.209839 0.231550 0.906
## dist_to_closest_highwaym:factor(zip_code)19123 0.190724 0.239799 0.795
## dist_to_closest_highwaym:factor(zip_code)19124 0.842642 0.229341 3.674
## dist_to_closest_highwaym:factor(zip_code)19125 0.402901 0.230236 1.750
## dist_to_closest_highwaym:factor(zip_code)19126 0.913589 0.231940 3.939
## dist_to_closest_highwaym:factor(zip_code)19127 1.206601 0.232072 5.199
## dist_to_closest_highwaym:factor(zip_code)19128 1.063832 0.228874 4.648
## dist_to_closest_highwaym:factor(zip_code)19129 0.510664 0.244264 2.091
## dist_to_closest_highwaym:factor(zip_code)19130 0.439416 0.237740 1.848
## dist_to_closest_highwaym:factor(zip_code)19131 0.358948 0.229765 1.562
## dist_to_closest_highwaym:factor(zip_code)19132 1.072811 0.229741 4.670
## dist_to_closest_highwaym:factor(zip_code)19133 0.080086 0.235106 0.341
## dist_to_closest_highwaym:factor(zip_code)19134 -0.160232 0.229124 -0.699
## dist_to_closest_highwaym:factor(zip_code)19135 0.841173 0.231404 3.635
## dist_to_closest_highwaym:factor(zip_code)19136 0.839274 0.231057 3.632
## dist_to_closest_highwaym:factor(zip_code)19137 2.030485 0.247053 8.219
## dist_to_closest_highwaym:factor(zip_code)19138 1.652908 0.229476 7.203
## dist_to_closest_highwaym:factor(zip_code)19139 1.214585 0.230514 5.269
## dist_to_closest_highwaym:factor(zip_code)19140 0.766626 0.229588 3.339
## dist_to_closest_highwaym:factor(zip_code)19141 1.253000 0.229576 5.458
## dist_to_closest_highwaym:factor(zip_code)19142 0.694584 0.231687 2.998
## dist_to_closest_highwaym:factor(zip_code)19143 1.103604 0.229197 4.815
## dist_to_closest_highwaym:factor(zip_code)19144 1.206826 0.229182 5.266
## dist_to_closest_highwaym:factor(zip_code)19145 1.333953 0.230112 5.797
## dist_to_closest_highwaym:factor(zip_code)19146 1.170615 0.229708 5.096
## dist_to_closest_highwaym:factor(zip_code)19147 0.687202 0.229927 2.989
## dist_to_closest_highwaym:factor(zip_code)19148 0.844057 0.229632 3.676
## dist_to_closest_highwaym:factor(zip_code)19149 1.076480 0.230911 4.662
## dist_to_closest_highwaym:factor(zip_code)19150 0.857834 0.230941 3.715
## dist_to_closest_highwaym:factor(zip_code)19151 0.557854 0.230098 2.424
## dist_to_closest_highwaym:factor(zip_code)19152 1.051156 0.231227 4.546
## dist_to_closest_highwaym:factor(zip_code)19153 -0.554331 0.236120 -2.348
## dist_to_closest_highwaym:factor(zip_code)19154 0.748083 0.229961 3.253
## dist_to_closest_highwaym:factor(zip_code)19192 NA NA NA
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## dist_to_closest_highwaym 3.92e-05 ***
## factor(zip_code)19103 0.034585 *
## factor(zip_code)19104 < 2e-16 ***
## factor(zip_code)19106 1.41e-05 ***
## factor(zip_code)19107 0.346637
## factor(zip_code)19109 0.016547 *
## factor(zip_code)19111 < 2e-16 ***
## factor(zip_code)19112 0.006026 **
## factor(zip_code)19114 2.45e-15 ***
## factor(zip_code)19115 5.33e-06 ***
## factor(zip_code)19116 0.112433
## factor(zip_code)19118 0.954396
## factor(zip_code)19119 < 2e-16 ***
## factor(zip_code)19120 < 2e-16 ***
## factor(zip_code)19121 < 2e-16 ***
## factor(zip_code)19122 < 2e-16 ***
## factor(zip_code)19123 < 2e-16 ***
## factor(zip_code)19124 < 2e-16 ***
## factor(zip_code)19125 < 2e-16 ***
## factor(zip_code)19126 < 2e-16 ***
## factor(zip_code)19127 < 2e-16 ***
## factor(zip_code)19128 < 2e-16 ***
## factor(zip_code)19129 < 2e-16 ***
## factor(zip_code)19130 2.01e-14 ***
## factor(zip_code)19131 < 2e-16 ***
## factor(zip_code)19132 < 2e-16 ***
## factor(zip_code)19133 < 2e-16 ***
## factor(zip_code)19134 < 2e-16 ***
## factor(zip_code)19135 < 2e-16 ***
## factor(zip_code)19136 < 2e-16 ***
## factor(zip_code)19137 0.534423
## factor(zip_code)19138 < 2e-16 ***
## factor(zip_code)19139 < 2e-16 ***
## factor(zip_code)19140 < 2e-16 ***
## factor(zip_code)19141 < 2e-16 ***
## factor(zip_code)19142 < 2e-16 ***
## factor(zip_code)19143 < 2e-16 ***
## factor(zip_code)19144 < 2e-16 ***
## factor(zip_code)19145 < 2e-16 ***
## factor(zip_code)19146 < 2e-16 ***
## factor(zip_code)19147 < 2e-16 ***
## factor(zip_code)19148 < 2e-16 ***
## factor(zip_code)19149 < 2e-16 ***
## factor(zip_code)19150 3.84e-06 ***
## factor(zip_code)19151 < 2e-16 ***
## factor(zip_code)19152 < 2e-16 ***
## factor(zip_code)19153 < 2e-16 ***
## factor(zip_code)19154 3.04e-08 ***
## factor(zip_code)19192 0.118919
## total_area < 2e-16 ***
## year_built < 2e-16 ***
## dist_parkm < 2e-16 ***
## dist_libm < 2e-16 ***
## dist_cityhallm 4.65e-11 ***
## dist_hospitalm < 2e-16 ***
## dist_historicm < 2e-16 ***
## dist_to_closest_highwaym:factor(zip_code)19103 4.64e-08 ***
## dist_to_closest_highwaym:factor(zip_code)19104 1.76e-14 ***
## dist_to_closest_highwaym:factor(zip_code)19106 1.46e-15 ***
## dist_to_closest_highwaym:factor(zip_code)19107 5.95e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19109 NA
## dist_to_closest_highwaym:factor(zip_code)19111 2.01e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19112 0.023749 *
## dist_to_closest_highwaym:factor(zip_code)19114 0.001585 **
## dist_to_closest_highwaym:factor(zip_code)19115 1.83e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19116 1.12e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19118 0.000196 ***
## dist_to_closest_highwaym:factor(zip_code)19119 8.64e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19120 1.87e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19121 4.27e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19122 0.364812
## dist_to_closest_highwaym:factor(zip_code)19123 0.426412
## dist_to_closest_highwaym:factor(zip_code)19124 0.000239 ***
## dist_to_closest_highwaym:factor(zip_code)19125 0.080129 .
## dist_to_closest_highwaym:factor(zip_code)19126 8.19e-05 ***
## dist_to_closest_highwaym:factor(zip_code)19127 2.00e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19128 3.35e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19129 0.036563 *
## dist_to_closest_highwaym:factor(zip_code)19130 0.064560 .
## dist_to_closest_highwaym:factor(zip_code)19131 0.118233
## dist_to_closest_highwaym:factor(zip_code)19132 3.02e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19133 0.733377
## dist_to_closest_highwaym:factor(zip_code)19134 0.484350
## dist_to_closest_highwaym:factor(zip_code)19135 0.000278 ***
## dist_to_closest_highwaym:factor(zip_code)19136 0.000281 ***
## dist_to_closest_highwaym:factor(zip_code)19137 < 2e-16 ***
## dist_to_closest_highwaym:factor(zip_code)19138 5.91e-13 ***
## dist_to_closest_highwaym:factor(zip_code)19139 1.37e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19140 0.000841 ***
## dist_to_closest_highwaym:factor(zip_code)19141 4.82e-08 ***
## dist_to_closest_highwaym:factor(zip_code)19142 0.002718 **
## dist_to_closest_highwaym:factor(zip_code)19143 1.47e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19144 1.40e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19145 6.76e-09 ***
## dist_to_closest_highwaym:factor(zip_code)19146 3.47e-07 ***
## dist_to_closest_highwaym:factor(zip_code)19147 0.002801 **
## dist_to_closest_highwaym:factor(zip_code)19148 0.000237 ***
## dist_to_closest_highwaym:factor(zip_code)19149 3.13e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19150 0.000204 ***
## dist_to_closest_highwaym:factor(zip_code)19151 0.015334 *
## dist_to_closest_highwaym:factor(zip_code)19152 5.47e-06 ***
## dist_to_closest_highwaym:factor(zip_code)19153 0.018892 *
## dist_to_closest_highwaym:factor(zip_code)19154 0.001142 **
## dist_to_closest_highwaym:factor(zip_code)19192 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.808 on 243900 degrees of freedom
## Multiple R-squared: 0.3712, Adjusted R-squared: 0.371
## F-statistic: 1412 on 102 and 243900 DF, p-value: < 2.2e-16
First, I filtered the property data in the following ways.
Keep properties whose sale date is between Jan. 1st, 2020 - Dec.31st, 2024
Adjust sales prices for inflation
Remove properties whose sale price is < $30,000 and > 5 SD from the mean sale price
Remove properties whose total area is < 100 sq. ft.
Then, I calculated each property’s distance to the four highways of interest as well as amenities. The amenities that we’ve looked into so far are parks, libraries, transit stops, schools, City Hall (proxy for the central business district), hospitals, museums, and historic sites. We also included distance to prisons as a disamenity.
We still need to look at sale price’s relationship with restaurants and grocery stores (I just need to figure out how to use the google API to do this).
# plot of log_price_perTA
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = cleaned_props, aes(color = log_price_perTA), size = 0.2) +
scale_color_gradient(high = "#eb5600",
low = "#f7cfb7") +
labs(title = "Property Price (log sale price per total area)",
color = "Log Price") +
theme_void()
I’m going to go through the following columns of interest (amenity predictors) and make a map + scatterplot and get the Pearson correlation coefficient between the amenity/disamenity + sale price (log_price + log_price_perTA).
dist_to_closest_highwaym
dist_parkm
dist_libm
dist_transitm
dist_schoolm
dist_cityhallm
dist_hospitalm
dist_museumm
dist_prisonm
dist_historicm
# only keeping two outcomes (log_price + log_price_perTA) and all amenities
vars_of_int <- cleaned_props %>%
select(log_price, log_price_perTA,
dist_to_closest_highwaym, dist_parkm, dist_libm, dist_transitm, dist_schoolm, dist_cityhallm, dist_hospitalm, dist_museumm, dist_prisonm, dist_historicm)
# cor matrix for corplot
cors_logprice <- cor(vars_of_int %>%
select(-log_price_perTA) %>%
rename(dist_2clssthghwy = dist_to_closest_highwaym) %>%
st_drop_geometry())
cors_logpriceperTA <- cor(vars_of_int %>%
select(-log_price) %>%
rename(dist_2clssthghwy = dist_to_closest_highwaym) %>%
st_drop_geometry())
# significance test
sigtest_logprice <- cor.mtest(vars_of_int %>%
select(-log_price_perTA) %>%
rename(dist_2clssthghwy = dist_to_closest_highwaym) %>%
st_drop_geometry())
sigtest_logpriceperTA <- cor.mtest(vars_of_int %>%
select(-log_price) %>%
rename(dist_2clssthghwy = dist_to_closest_highwaym) %>%
st_drop_geometry())
# pretty correlation matrix of all continuous variables
corrplot(cors_logprice,
title = "Correlation Matrix for Log Price",
p.mat = sigtest_logprice$p,
method = 'circle',
type = 'lower',
insig='blank',
addCoef.col ='black',
number.cex = 0.8,
diag=FALSE)
corrplot(cors_logpriceperTA,
title = "Correlation Matrix for Log Price",
p.mat = sigtest_logpriceperTA$p,
method = 'circle',
type = 'lower',
insig='blank',
addCoef.col ='black',
number.cex = 0.8,
diag=FALSE)
From the correlation matrix, we can see that distance to City Hall, distance to Transit, and distance to museums are all highly collinear with correlation coefficients ranging from .78 (transit and museum) to .98 (museum and city hall). Interestingly, distance to city hall and distance to museum are the variables with the correlation coefficient of highest magnitude with log_price_perTA (at -.38 and -.34 respectively), but distance to transit is not as correlated to the outcome variable (-.18).
When considering all properties in Philadelphia (after cleaning this data a bit), we see that distance to closest highway now has a negative relationship with property sale price (-0.18).
I’m moving forward with log_price_perTA as outcome for now because the size of the property explains a lot of the variance in property sale price already and right now we are focused on external predictors.
dat <- vars_of_int %>%
select(log_price_perTA, dist_parkm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Park"
cor_toplot <- round(cor(dat$dist_parkm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_parkm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 2000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Park (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_parkm)) +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent", alpha = 0.75) +
labs(title = "Distance to nearest park",
color = "Dist. to park") +
theme_void()
Looking at distance to park without the extreme outlier that is almost
2500m from the nearest park
dat <- vars_of_int %>%
select(log_price_perTA, dist_parkm) %>%
filter(dist_parkm < 2000)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Park"
cor_toplot <- round(cor(dat$dist_parkm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_parkm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 1500,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
subtitle = "Removed outlier",
x = "Distance to nearest Park (m)",
y = "Log Price (per total area)") +
theme_minimal()
Removing the outlier had no effect on the correlation.
dat <- vars_of_int %>%
select(log_price_perTA, dist_libm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Library"
cor_toplot <- round(cor(dat$dist_libm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_libm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 3000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Library (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_libm)) +
geom_sf(data = libraries, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest Library",
color = "Dist. to Lib.") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_transitm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Transit Stop"
cor_toplot <- round(cor(dat$dist_transitm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_transitm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 10000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Transit Stop (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_transitm)) +
geom_sf(data = transit, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest Transit stop",
color = "Dist. to station") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_schoolm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest School"
cor_toplot <- round(cor(dat$dist_schoolm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_schoolm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 2000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest School (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_schoolm)) +
geom_sf(data = school, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest School",
color = "Dist. to School") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_cityhallm)
scat_title <- "Log Price (per total area) as a function of Distance to City Hall"
cor_toplot <- round(cor(dat$dist_cityhallm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_cityhallm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 20000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to City Hall (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_cityhallm)) +
geom_sf(data = city_hall, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to City Hall",
color = "Dist. to CH") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_hospitalm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Hospital"
cor_toplot <- round(cor(dat$dist_hospitalm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_hospitalm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 5000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Hospital (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_hospitalm)) +
geom_sf(data = hospitals, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest Hospital",
color = "Dist. to Hospital") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_museumm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Museum"
cor_toplot <- round(cor(dat$dist_museumm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_museumm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 15000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Museum (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_museumm)) +
geom_sf(data = museums, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest Museum",
color = "Dist. to Museum") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_prisonm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Prison"
cor_toplot <- round(cor(dat$dist_prisonm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_prisonm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 9000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Prison (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_prisonm)) +
geom_sf(data = prisons, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to nearest Prison",
color = "Dist. to Prison") +
theme_void()
dat <- vars_of_int %>%
select(log_price_perTA, dist_historicm)
scat_title <- "Log Price (per total area) as a function of Distance to nearest Historic Site"
cor_toplot <- round(cor(dat$dist_historicm, dat$log_price_perTA), 3)
# scatter
ggplot(dat,
aes(x = dist_historicm,
y = log_price_perTA)) +
geom_point(size = 0.2, alpha = 0.3, color = "#1a9988") +
geom_smooth(method = "lm", color = "#eb5600", fill = "#eba075") +
geom_text(aes(label = cor_toplot,
x = 6000,
y = 10,
color = "#eb5600"),
show.legend = F) +
labs(title = scat_title,
x = "Distance to nearest Historic Site (m)",
y = "Log Price (per total area)") +
theme_minimal()
# map
ggplot() +
geom_sf(data = philly_bounds, fill = "#c9b887") +
geom_sf(data = hydro, fill = "#7d97c7", color = "transparent") +
geom_sf(data = philly_parks, fill = "#6b9169", color = "transparent") +
geom_sf(data = dat, size = 0.2,
aes(color = dist_historicm)) +
geom_sf(data = historic, color = "#faef73") +
scale_color_gradient(high = "#f7cfb7",
low = "#eb5600") +
labs(title = "Distance to Historic Site",
color = "Dist. to Hist.") +
theme_void()